#!/usr/bin/python

#this script grabs the rss feed from piratebay
import urllib2
import re
import urllib

"""
<li><a href="//rss.thepiratebay.se/201">Movies</a></li>

					<option value="101">Music</option>


GOAGENT = '127.0.1:8087'
WCHAMBER = '127.0.1:1998'

proxy = urllib2.ProxyHandler({'http': '127.0.0.1:1998'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
f = urllib2.urlopen('http://www.google.com')
print f.read()

"""
def grabURL(proxySetting):

	RSSURL = 'http://thepiratebay.se/rss'

	"""
	proxySetting: the proxy setting

	GOAGENT = '127.0.1:8087'
	WCHAMBER = '127.0.1:1998'

	return value is the list of tuples that contains (option, rssRUL),
	e.g ('music', 'rss.thepiratebay.se/101')
	"""

	content = ''

	if proxySetting == 'none':
		f = urllib.urlopen(RSSURL)
		content = f.read()
		print 'not using proxy'

	else:
		print 'using proxy: ', proxySetting
		proxy = urllib2.ProxyHandler({'http': proxySetting})
		opener = urllib2.build_opener(proxy)
		urllib2.install_opener(opener)
		content = urllib2.urlopen(RSSURL)
	
	PREFIX = 'http://rss.thepiratebay.se/'
	retTupList = []

	for line in content:
		
		m = re.search(r'<option value="(\d+)">(\w+\s*\w+)</option>', line)
		if m:
			print "%s -> %s " % (m.group(1), m.group(2))
			retTupList.append((m.group(2), PREFIX+m.group(1)))

	
	#the first is 0
	del retTupList[0]
	
	return retTupList

def main():
	grabURL(WCHAMBER)

if __name__ == '__main__':
	
	#RSSURL = 'http://thepiratebay.se/rss'
	main()
	